'''
@Time    : 2022/4/2 17:46
@Author  : Fu Junyu
@Site    : www.fujunyu.cn
@File    : bert_text2vec.py
@Software: PyCharm
'''

'''
使用bert将movies中的text属性转换为向量，每个向量长度768
使用前需安装bert-serving-server 1.10、bert-serving-client 1.10，tensorflow 1.14
此步之前须启动bert_serving
启动命令：bert-serving-start -model_dir E:\cased_L-12_H-768_A-12 -num_worker=1 -max_seq_len=100
'''
import torch
from bert_serving.client import BertClient
from Data.datalodar import moviesFeatTest
def text2vec():
    bc = BertClient(ip='localhost', check_version=False, check_length=False)

    train_file = '../dataset/Ml-100k/u.item'

    movieText = []

    with open(train_file, "r", encoding='ISO-8859-1') as f:
        for l in f.readlines():
            if len(l) > 0:
                l = l.strip('\n').split('|')
                mtext = str(l[1])

                movieText.append(mtext)

    movieText = torch.tensor(bc.encode(movieText))

    torch.save(movieText, '../dataset/Ml-100k/textTensor/movieText.pt')


if __name__ == '__main__':
    text2vec()


